In [12]:
import pandas as pd
import numpy as np
import re
import os 
from datetime import datetime

import plotly.express as px
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=False)

Get Data

In [13]:
cities   = ['Amsterdam','LA','Melbourne','NYC','Rio','Tokyo','Venice']
data_dir = os.getcwd() + '/data/' #/data/ or /data-sample/
In [14]:
def get_dir(dir, group):
    '''Input current data directory, sub dirs, and return full directory of all data files'''
    file_dirs = []
    for g in group:
        sub_dir = os.path.join(dir, g)
        file_dir = [os.path.join(sub_dir,file) for file in os.listdir(sub_dir) if os.path.isfile(os.path.join(sub_dir, file))]
        file_dirs = file_dirs + file_dir
    return file_dirs

def read_data(file_dirs, group):
    '''Read all file directories and return dfs with markers'''
    dfs = []
    for file_dir in file_dirs:
        df = pd.read_csv(file_dir)
        date = re.sub('.csv(.*?)','',re.sub('(.*?)_','',file_dir))
        city = [g for g in group if g in file_dir]
        #add date/city markers
        df['date'] = datetime.strptime(date, '%y-%m-%d')
        df['city'] = ''.join(city)
        dfs.append(df)
    return dfs
In [15]:
dfs = read_data(get_dir(data_dir,cities),cities)

#merge all
df = pd.concat(dfs, axis=0, ignore_index=True, sort=False)
print("Dataframe shape:{}".format(df.shape))
df.head()
Dataframe shape:(778688, 18)
Out[15]:
id name host_id host_name neighbourhood_group neighbourhood latitude longitude room_type price minimum_nights number_of_reviews last_review reviews_per_month calculated_host_listings_count availability_365 date city
0 2818 Quiet Garden View Room & Super Fast WiFi 3159 Daniel NaN Oostelijk Havengebied - Indische Buurt 52.36575 4.94142 Private room 59 3 278 2020-02-14 2.08 1 81 2020-03-13 Amsterdam
1 20168 Studio with private bathroom in the centre 1 59484 Alexander NaN Centrum-Oost 52.36509 4.89354 Private room 100 1 331 2020-03-11 2.71 2 164 2020-03-13 Amsterdam
2 25428 Lovely apt in City Centre (w.lift) near Jordaan 56142 Joan NaN Centrum-West 52.37297 4.88339 Entire home/apt 125 14 5 2020-02-09 0.19 1 132 2020-03-13 Amsterdam
3 27886 Romantic, stylish B&B houseboat in canal district 97647 Flip NaN Centrum-West 52.38761 4.89188 Private room 155 2 217 2020-03-02 2.18 1 151 2020-03-13 Amsterdam
4 28871 Comfortable double room 124245 Edwin NaN Centrum-West 52.36719 4.89092 Private room 75 2 329 2020-03-01 2.83 3 146 2020-03-13 Amsterdam
In [16]:
df['city'].value_counts()
Out[16]:
NYC          203853
LA           156943
Rio          141851
Melbourne    102723
Amsterdam     79361
Tokyo         58804
Venice        35153
Name: city, dtype: int64

Draw Mapbox with Plotly Express

In [17]:
def mapbox_express(dataframe):
    '''Make Plotly Express with time animations '''
    px.set_mapbox_access_token(open(".mapbox_token").read())
    fig = px.scatter_mapbox(dataframe, lat="latitude", lon="longitude", color="room_type", size="price", title=dataframe['city'].values[0],
                  color_continuous_scale=px.colors.cyclical.IceFire, size_max=15, zoom=10,animation_frame="date")
    fig.show()
In [18]:
for city in cities:
    temp_df = df[df['city'] == city]
    temp_df = temp_df.assign(date=lambda d: d['date'].astype(str)) #convert datetime to string
    temp_df = temp_df.sort_values(by=['date']) #sort
    mapbox_express(temp_df)